rm(list=ls())
library(readxl);library(tidyverse)

df <- read_rds("Data/TheOMGDataset/intercoder_data.rds")

# Compare -----------------------------------------------------------------
df <- df |> group_by(country, final_id) |> mutate("n_coders" = n())
prop.table(table(df$n_coders))

df <- df[which(df$n_coders==2),]
ids <- unique(df$final_id)

get_cm <- function(variable_to_compare){
  tmp <- df[, c("final_id", "sheet", variable_to_compare)]
  tmp$var <- tmp[[variable_to_compare]]
  tmp[[variable_to_compare]] <- NULL
  
  tmp$var <- as.factor( tmp$var )
  tmp <- na.omit(tmp)
  tmp <- unique(tmp)
  
  #
  tmp <- tmp |> pivot_wider(names_from = sheet, values_from = var)
  
  cm <- try(caret::confusionMatrix( tmp$Sheet1, tmp$Sheet2 ), silent =T)
  if(class(cm)=="try-error"){
    cm <- data_frame("variable" = variable_to_compare,
                     "error" = "Yes")
  } else{
    cm <- data.frame(t(cm$overall))
    cm$variable <- variable_to_compare
  }
  
  return(cm)

}

all_intercoder_stats <- lapply(colnames(df), function(x) get_cm(x) )
all_intercoder_stats <- bind_rows(all_intercoder_stats)




reliadat <- all_intercoder_stats |> subset(!variable %in% c("sheet", "campaign_name","id","country","ccode","location_ccode",  "target_code","counter_id","phase",
                                                         "start_date","start_prec","end_year...14","end_month","end_date","end_prec", "sources", "coder", "Coder", "sgc", "cn",
                                                         "n_coders", "notes", "outcome", "final_id",
                                                         grep("name", all_intercoder_stats$variable, value =T),
                                                         grep("start_year", all_intercoder_stats$variable, value = TRUE),
                                                         grep("end_year", all_intercoder_stats$variable, value = TRUE),
                                                         grep("start_month", all_intercoder_stats$variable, value = TRUE),
                                                         grep("_note", all_intercoder_stats$variable, value = TRUE)))
reliadat <- reliadat[which(is.na(reliadat$error)==TRUE),]

reliadat$Variable <- reliadat$variable

reliadat$vargroup <- ifelse(grepl("^nv_|^v_|^strategy_camp", reliadat$Variable), "Violence strategy",
                           ifelse(grepl("atleast_", reliadat$Variable), "Social group participation",
                                  ifelse(grepl("dominate", reliadat$Variable), "Social group dominate",
                                         ifelse(grepl("originate", reliadat$Variable), "Social group originate",
                                                ifelse(grepl("org_", reliadat$Variable), "Org. participation",
                                                       ifelse(grepl("goal_", reliadat$Variable), "Demand",
                                                              ifelse(grepl("rel_", reliadat$Variable), "Religion",
                                                                     ifelse(grepl("end_|start_|date_", reliadat$Variable), "Time",
                                                                            ifelse(grepl("coord_leaders", reliadat$Variable), "Other",
                                                                                   ifelse(grepl("^peak_", reliadat$Variable), "Other",
                                                                                          ifelse(grepl("size_without_petition", reliadat$Variable), "Other",
                                                                                                 ifelse(grepl("ide_", reliadat$Variable), "Ideology", "NotShow"))))))))))))


#Remove variables (do this before calculating missing)
reliadat <- reliadat[grep("change_note", reliadat$Variable, invert = TRUE), ]
reliadat <- reliadat[grep("demand_note", reliadat$Variable, invert = TRUE), ]
reliadat <- reliadat[grep("e_regionpol_6C", reliadat$Variable, invert = TRUE), ]
reliadat <- reliadat[grep("region", reliadat$Variable, invert = TRUE), ]
reliadat <- reliadat[grep("team", reliadat$Variable, invert = TRUE), ]

#Calculate missing
reliadat <- reliadat %>% group_by(vargroup) %>% mutate("n_vars" = length(unique(Variable)),
                                                     "mean_accuracy" = mean(Accuracy, na.rm=T))


reliadat$grouplabel <- paste0(reliadat$vargroup, " | Variables: ", reliadat$n_vars, " | Mean accuracy: ", round(reliadat$mean_accuracy, 2) )


reliadat$Variable <-  gsub("_", " ", 
                          gsub("^v_", "", 
                               gsub("^nv_", "", 
                                    gsub("^ide_", "", 
                                         gsub("goal_", "", 
                                              gsub("^org_", "", 
                                                   gsub("originate_", "",
                                                        gsub("dominate_", "",
                                                             gsub("atleast_", "", 
                                                                  gsub("rel_", "", reliadat$Variable))))))))))

#Variable names
reliadat$Variable_label <- str_to_sentence(reliadat$Variable)
reliadat$Variable_label <- gsub("^Id", "ID" , reliadat$Variable_label)
reliadat$Variable_label <- gsub(" id", " ID", reliadat$Variable_label)
reliadat$Variable_label <- gsub("[Cc]code", "CCODE", reliadat$Variable_label)
reliadat$Variable_label <- gsub("prec$"   , "precision", reliadat$Variable_label)
reliadat$Variable_label <- gsub("Civilrights"   , "Civil rights", reliadat$Variable_label)
reliadat$Variable_label <- gsub("[Ss]ocialgroup"   , "Social group", reliadat$Variable_label)
reliadat$Variable_label <- gsub("Polparty"   , "Political party", reliadat$Variable_label)


reliadat$Variable_label <- gsub("^Main ", "Main: ", reliadat$Variable_label)

reliadat$Variable_label <- case_match(reliadat$Variable_label,
                                     "Demo" ~ "Democracy",
                                     "Coord leaders" ~ "Coordinated leadership",
                                     "Weapon acq" ~ "Weapon acquisition",
                                     "Indwork" ~ "Industrial workers",
                                     "Nonindurban" ~ "Non-industrial urban workers",
                                     "Pubemp" ~ "Public employees",
                                     "Business" ~ "Business elites",
                                     "Agrarianelites" ~ "Agrarian elites",
                                     "Urb middle class" ~ "Urban middle class",
                                     "Relethnic" ~ "Religious or ethnic",
                                     "Milemp" ~ "Military employees",
                                     "Laborunion" ~ "Labor union",
                                     "Militaryvet" ~ "Military veterans",
                                     "Womens" ~ "Women",
                                     "Militarygovt" ~ "Regime security forces",
                                     "Weapon train" ~ "Weapon training",
                                     "Civilsociety other" ~ "Civil society, other",
                                     "Democracy hr" ~ "Democracy and HR",
                                     "Rural" ~ "Rural workers",
                                     "Pride antipride campaign" ~ "Pride-antipride campaign",
                                     "Strategy camp" ~ "Campaign strategy",
                                     
                                     "Civil rights anti" ~ "Institutional: Anti-civil rights",
                                     "Civil rights" ~ "Institutional: Civil rights",
                                     
                                     "Election anti" ~ "Institutional: Anti-Electoral changes",
                                     "Election" ~ "Institutional: Electoral changes",
                                     
                                     "Executive anti" ~ "Institutional: Anti-constrain executive",
                                     "Executive" ~ "Institutional: Constrain executive",
                                     
                                     "Free expression anti" ~ "Institutional: Anti-freedom of expression",
                                     "Free expression" ~ "Institutional: Freedom of expression",
                                     
                                     "Political power anti" ~ "Institutional: Anti-Political power",
                                     "Political power" ~ "Institutional: Political power",
                                     
                                     "Regime anti" ~ "Anti-regime change",
                                     "Regime" ~ "Regime change",
                                     
                                     "Government" ~ "Remove government",
                                     "Government anti" ~ "Anti-remove government",
                                     
                                     "Main" ~ "Main demand",
                                     
                                     #"XXX" ~ "XXX",
                                     #"XXX" ~ "XXX",
                                     
                                     .default = reliadat$Variable_label)

reliadat$Variable_label <- ifelse(grepl(" anti$", reliadat$Variable_label), gsub("(.*) anti$", "Anti-\\1", reliadat$Variable_label), reliadat$Variable_label)
reliadat$Variable_label <- ifelse(grepl("^Anti-Main:", reliadat$Variable_label), gsub("Anti-Main: ", "Main: Anti-", reliadat$Variable_label), reliadat$Variable_label)
reliadat$Variable_label <- gsub("gov$", "government", reliadat$Variable_label)
reliadat$Variable_label <- gsub("Government ", "Government: ", reliadat$Variable_label)
reliadat$Variable_label <- gsub("^Government:", "Remove government:", reliadat$Variable_label)
reliadat$Variable_label <- gsub("hos only", "HOS only", reliadat$Variable_label)

reliadat$Variable_label <- factor(reliadat$Variable_label, levels = c("Democracy" ,"Autonomy", "Secession", "Regime change", "Anti-regime change",
                                                                    "Remove government", "Anti-remove government", "Remove government: HOS only",
                                                                    "Remove government: part of cabinet", "Remove government: cabinet", "Remove government: any",
                                                                    unique(grep("^Institutional", reliadat$Variable_label, value =T)),
                                                                    
                                                                    setdiff(unique(reliadat$Variable_label), c("Democracy" ,"Autonomy", "Secession", "Regime change", "Anti-regime change",
                                                                                                              "Remove government", "Anti-remove government", "Remove government: HOS only",
                                                                                                              "Remove government: part of cabinet", "Remove government: cabinet", "Remove government: any",
                                                                                                              unique(grep("^Institutional", reliadat$Variable_label, value =T)) )) ))


#"Main: Anti-remove government", 
reliadat <- reliadat[order(reliadat$vargroup, reliadat$Variable_label),]

p <- reliadat %>%
  subset(vargroup!="NotShow" & vargroup!="Time") %>%
  ggplot(aes(x = Accuracy, y = Variable_label, xmax = AccuracyUpper, xmin = AccuracyLower)) +
  geom_point() +
  xlab("Accuracy") + ylab("") +
  scale_x_continuous(limits = c(0.4, 1), breaks = seq(0, 1, 0.1)) +
  theme_minimal() + 
  theme(legend.title = element_blank(),
        axis.text.y = element_text(size = 12),
        axis.text.x = element_text(size = 14),
        axis.title = element_text(size = 14),
        strip.text = element_text(size = 14),
        legend.text = element_text(size = 14),
        panel.grid.minor = element_blank(),
        strip.background = element_rect(fill = "grey60", color = "grey60"))


#facet_wrap(~vargroup, scales = "free_y", ncol = 1, strip.position = "top")
p.grid <- p + facet_grid(grouplabel ~ ., scales = "free_y", space = "free_y")
p.wrap <- p + facet_wrap(~ grouplabel, ncol = 1, scales = "free_y")

# convert both into grob objects
gp.grid <- ggplotGrob(p.grid)
gp.wrap <- ggplotGrob(p.wrap)

# apply the panel heights of the facet_grid version to the facet_wrap one
gp.wrap$heights[gp.wrap$layout[grep("panel", gp.wrap$layout$name), "t"]] <- 
  gp.grid$heights[gp.grid$layout[grep("panel", gp.grid$layout$name), "t"]]
# plot the facet_wrap version (Can't use ggsave)

jpeg("OUtput/FigureE1.jpg", width = 10, height = 13, units = "in", res = 350)
grid::grid.draw(gp.wrap)
dev.off()


